X. muta population genetics from SNPs
Analyzing 2bRAD generated SNPs (XXX loci) for population
structure//genetic connectivity across sites and depth zones in FL
How many reads?
xestoReads = read.delim("../data/xestoRawReadCounts", header = FALSE)
colnames(xestoReads) = c("sample", "reads")
head(xestoReads)
## sample reads
## 1 FK-Xm1-1 70484555
## 2 FK-Xm1-2 102922235
## 3 FK-Xm1-3 59128545
## 4 FK-Xm1-4 72743555
## 5 FK-Xm1-5 119901675
## 6 FK-Xm1-6 61086586
#total reads
sum(xestoReads$reads)
## [1] 2003083840
#average reads/sample
(sum(xestoReads$reads)/366)
## [1] 5472907
xestoFiltReads = read.delim("../data/xestoFiltReadCounts", header = FALSE)
colnames(xestoFiltReads) = c("sample", "reads")
head(xestoFiltReads)
## sample reads
## 1 fk_X001 1945262
## 2 fk_X002 347932
## 3 fk_X003 2704686
## 4 fk_X004 4198543
## 5 fk_X005 3855099
## 6 fk_X006 1978019
#total reads
sum(xestoFiltReads$reads)
## [1] 1345919357
#average reads/sample
(sum(xestoFiltReads$reads)/366)
## [1] 3677375
Identifiying clonal multi-locus genotypes
Dendrogram with clones
Identification of any natural clones using technical replicates as a
baseline for clonality between samples.
cloneBams = read.csv("../data/flXestoMetaData.csv") %>% dplyr::select(-sampleID, -date, -species)# list of bam files
cloneMa = as.matrix(read.table("../data/clones/xestoClones3.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(cloneMa) = list(cloneBams[,1],cloneBams[,1])
clonesHc = hclust(as.dist(cloneMa), "ave")
cloneSites = cloneBams$region
cloneDepth = cloneBams$depthZone
cloneDend = cloneMa %>% as.dist() %>% hclust(., "ave") %>% as.dendrogram()
cloneDData = cloneDend %>% dendro_data()
# Making the branches hang shorter so we can easily see clonal groups
cloneDData$segments$yend2 = cloneDData$segments$yend
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend2[i] == 0) {
cloneDData$segments$yend2[i] = (cloneDData$segments$y[i] - 0.01)}}
cloneDendPoints = cloneDData$labels
cloneDendPoints$site = cloneSites[order.dendrogram(cloneDend)]
cloneDendPoints$depth=cloneDepth[order.dendrogram(cloneDend)]
rownames(cloneDendPoints) = cloneDendPoints$label
# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(cloneDData$segments)) {
if (cloneDData$segments$yend[i] == 0) {
point[i] = cloneDData$segments$y[i] - 0.01
} else {
point[i] = NA}}
cloneDendPoints$y = point[!is.na(point)]
techReps = c("FKX014.1","FKX014.2", "FKX014.3", "FKX029.1", "FKX029.2", "FKX121.1", "FKX121.2", "FKX121.3", "FKX159.1", "FKX159.2", "FKX190.1", "FKX190.2", "FKX190.3", "SFX012.1", "SFX012.2", "SFX012.3", "SFX071.1", "SFX071.2", "SFX071.3", "SFX108.1", "SFX108.2", "SFX108.3")
cloneDendPoints$depth = factor(cloneDendPoints$depth)
cloneDendPoints$depth = factor(cloneDendPoints$depth,levels(cloneDendPoints$depth)[c(2,1)])
cloneDendPoints$site = factor(cloneDendPoints$site)
cloneDendPoints$site = factor(cloneDendPoints$site,levels(cloneDendPoints$site)[c(3, 8, 1, 2, 7, 4, 6, 5)])
cloneDendA = ggplot() +
geom_segment(data = segment(cloneDData), aes(x = x, y = y, xend = xend, yend = yend2), linewidth = 0.5) +
geom_point(data = cloneDendPoints, aes(x = x, y = y, fill = site, shape = depth), size = 4, stroke = 0.25) +
#scale_fill_brewer(palette = "Dark2", name = "Population") +
scale_fill_manual(values = flPal, name= "Population")+
scale_shape_manual(values = c(24, 25), name = "Depth Zone")+
geom_hline(yintercept = 0.145, color = pink, lty = 5, linewidth = 0.75) + # creating a dashed line to indicate a clonal distance threshold
geom_text(data = subset(cloneDendPoints, subset = label %in% techReps), aes(x = x, y = (y - .015), label = label), angle = 90) + # spacing technical replicates further from leaf
geom_text(data = subset(cloneDendPoints, subset = !label %in% techReps), aes(x = x, y = (y - .010), label = label), angle = 90) +
labs(y = "Genetic distance (1 - IBS)") +
guides(fill = guide_legend(override.aes = list(shape = 22)))+
theme_classic()
cloneDend = cloneDendA + theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 12, color = "black", angle = 90),
axis.text.y = element_text(size = 10, color = "black"),
axis.line.y = element_line(),
axis.ticks.y = element_line(),
panel.grid = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(size = 12),
legend.text = element_text(size = 10),
legend.position = "bottom")
# cloneDend
ggsave("../figures/rmd/cloneDend.png", plot = cloneDend, height = 8, width = 40, units = "in", dpi = 300)
We removed the technical replicates/clones and re-ran ANGSD for all
the following pop-gen analyses.
Structure and dendrograms
bams = read.csv("../data/flXestoMetaData.csv")[-c(7, 15, 16, 32, 125, 126, 165, 197, 198, 253, 254, 314, 315, 353, 354),] # list of bams files and their populations (tech reps removed)
bams$sample <- gsub("\\.[1-3]*$", "", bams$sample)
bams$region = factor(bams$region)
bams$region = factor(bams$region, levels = levels(bams$region)[c(3, 8, 1, 2, 7, 4, 6, 5)])
bams$depthZone = factor(bams$depthZone)
bams$depthZone = factor(bams$depthZone, levels = levels(bams$depthZone)[c(2,1)])
# ma = as.matrix(read.table("../data/ftl/xestoFTL.ibsMat"))
ma = as.matrix(read.table("../data/xestoNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD
dimnames(ma) = list(bams[,3],bams[,3])
## admixture K = 2
#-------------------------------------
K2ad = read.table("../data/admix/noClones/K2.output") %>% dplyr::select(V6, V7)
K2ad %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 301.7527 49.2473
admixK2 = bams %>%
dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>%
cbind(K2ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7")
admixK2_melt = melt(admixK2, id = c("sample", "site", "depth", "depthm"))
admixK2$site.x = 1
## admixture K = 3
#-------------------------------------
K3ad = read.table("../data/admix/noClones/K3.output") %>% dplyr::select(V6, V7, V8)
K3ad %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 211.2603 43.1265 96.6129
admixK3 = bams %>%
dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>%
cbind(K3ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm1.2" = "V8") %>%
dplyr::select(order(colnames(.)))
admixK3_melt = melt(admixK3, id = c("sample", "site", "depth", "depthm"))
## ngsAdmix K = 4
#-------------------------------------
K4ad = read.table("../data/admix/noClones/K4.output") %>% dplyr::select(V6, V7, V8, V9)
K4ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
## sum(V6) sum(V7) sum(V8) sum(V9)
## 1 162.8505 38.5994 81.0511 68.4982
admixK4 = bams %>%
dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>%
cbind(K4ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm1.2" = "V8", "Xm1.3" = "V9") %>%
dplyr::select(order(colnames(.)))
admixK4_melt = melt(admixK4, id = c("sample", "site", "depth", "depthm"))
## admixture K = 5
#-------------------------------------
K5ad = read.table("../data/admix/noClones/K5.output") %>% dplyr::select(V6, V7, V8, V9, V10)
K5ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10))
## sum(V6) sum(V7) sum(V8) sum(V9) sum(V10)
## 1 103.7482 37.3177 71.0979 57.7997 81.0367
admixK5 = bams %>%
dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>%
cbind(K5ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm1.2" = "V8", "Xm1.3" = "V9", "Xm1.4" = "V10") #%>%
admixK5_melt = melt(admixK5, id = c("sample", "site", "depth", "depthm"))
## construct figure
#-------------------------------------
# ggtree(tr) + geom_nodelab(aes(label = node), hjust = -0.5)
{
tr = hclust(dist(ma),"ave")
p1 = ggtree(tr, layout = "rectangular", size = 0.35) +
geom_point2(aes(subset = (node == 354)), shape = 21, size = 4.5, fill = xestoKColPal[2]) +
geom_point2(aes(subset = (node == 357)), shape = 21, size = 4.5, fill=xestoKColPal[6]) +
geom_point2(aes(subset = (node == 358)), shape = 21, size = 4.5, fill = xestoKColPal[1]) +
geom_cladelabel(node = 354, hjust = 13.35, vjust = -0.1, label = "Xm2", barsize = 0) +
geom_cladelabel(node = 357, hjust = 3.87, vjust = 1.2, label = "Admixed", barsize = 0) +
geom_cladelabel(node = 358, hjust = 7.8, vjust = 17.7, label = "Xm1", barsize = 0)
p2 = p1 + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = site), stat='identity', color = "grey25", linewidth = 0.1, pwidth = 0.02, offset = 0.3) +
scale_fill_manual("Site", values = flPal) +
new_scale_fill()
p3 = p2 + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = depth), stat='identity', color = "grey25", linewidth = 0.1, pwidth = 0.02, offset = 1.54) +
scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF")) +
new_scale_fill()
p4 = p3 + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = depthm), stat='identity', color = "grey25", linewidth = 0.1, pwidth = 0.02, offset = 4.02) +
scale_fill_viridis_c("Depth (m)", option = "mako", trans = "reverse") +
new_scale_fill()
p5 = p4 + geom_fruit(data = admixK2_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable),
stat='identity', color = "grey25", pwidth = 0.2, offset = 9.0, linewidth = 0.1) +
scale_fill_manual("Lineage",values = xestoKColPal[1:5])
p6 = p5 + geom_fruit(data = admixK3_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable),
stat='identity', color = "grey25", pwidth = 0.12,
offset = 18.515, linewidth = 0.1)
p7 = p6 + geom_fruit(data = admixK4_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable),
stat='identity', color = "grey25", pwidth = 0.12,
offset = 37.757, linewidth = 0.1)
p8 = p7 + geom_fruit(data = admixK5_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable),
stat='identity', color = "grey25", pwidth = 0.12,
offset = 76.241, linewidth = 0.1) +
coord_cartesian(xlim = c(-1.13, 0.72)) +
theme_tree(legend.position = c(0.96, 0.5))
}
Population structure
PCA
cov = read.table("../data/pcangsd/flXestoPcangsd.cov") %>% as.matrix()
pcAdmix = read.table("../data/admix/noClones/K4.output") %>% dplyr::select(V6, V7, V8, V9)
pcAdmix %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
## sum(V6) sum(V7) sum(V8) sum(V9)
## 1 162.8505 38.5994 81.0511 68.4982
pcAdmix = pcAdmix %>% dplyr::rename("cluster1" = "V6", "cluster2" = "V7", "cluster3" = "V8", "cluster4" = "V9") %>% dplyr::select(order(colnames(.)))
pcaEig = eigen(cov)
xestoPcaVar = pcaEig$values/sum(pcaEig$values)*100
head(xestoPcaVar)
## [1] 5.839551 2.713174 2.008476 1.797844 1.294318 1.241920
pcangsd = bams %>% dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% cbind(pcAdmix)
pcangsd$sitedepth = as.factor(paste(pcangsd$site, pcangsd$depth, sep = " "))
pcangsd$sitedepth = factor(pcangsd$sitedepth)
pcangsd$sitedepth = factor(pcangsd$sitedepth, levels(pcangsd$sitedepth)[c(3, 12, 1, 2, 11, 10, 5, 4, 9, 8, 7, 6)])
pcangsd$PC1 = pcaEig$vectors[,1]
pcangsd$PC2 = pcaEig$vectors[,2]
pcangsd$PC3 = pcaEig$vectors[,3]
pcangsd$PC4 = pcaEig$vectors[,4]
pcangsdClustK2 = admixK2 %>% mutate(clusterK2 = ifelse(Xm1 >=0.75, "Xm1", ifelse(Xm2 >= 0.75, "Xm2", "Admixed")))
pcangsdClustK2$clusterK2 = as.factor(pcangsdClustK2$clusterK2)
levels(pcangsdClustK2$clusterK2)
## [1] "Admixed" "Xm1" "Xm2"
pcangsdClustK2$clusterK2 = factor(pcangsdClustK2$clusterK2, levels = levels(pcangsdClustK2$clusterK2)[c(2,3,1)])
pcangsdClustK4 = admixK4 %>% mutate(clusterK4 = ifelse(Xm1 >=0.75, "Xm1", ifelse(Xm2 >= 0.75, "Xm2", ifelse(Xm1.2 >= 0.75, "Xm1.2", ifelse(Xm1.3 >= 0.75, "Xm1.3", "Admixed")))))
pcangsdClustK4$clusterK4 = as.factor(pcangsdClustK4$clusterK4)
levels(pcangsdClustK4$clusterK4)
## [1] "Admixed" "Xm1" "Xm1.2" "Xm1.3" "Xm2"
pcangsdClustK4$clusterK4 = factor(pcangsdClustK4$clusterK4, levels = levels(pcangsdClustK4$clusterK4)[c(2,5,3,4,1)])
pcangsd = pcangsd %>% mutate(clusterK2 = pcangsdClustK2$clusterK2, clusterK4 = pcangsdClustK4$clusterK4)
# bamsClusters = pcangsd %>% dplyr::select(sample, cluster) %>% dplyr::arrange(sample)
# bamsSamples = read.delim("../data/snps/bamsNoClones", header = FALSE)
# bamsClusters$sample = bamsSamples$V1
# bamsClusters = as.data.frame(bamsClusters)
# write.table(x = bamsClusters, file = "../data/snps/bamsClusters", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)
pcangsd = merge(pcangsd, aggregate(cbind(mean.x = PC1, mean.y = PC2)~sitedepth, pcangsd, mean), by="sitedepth")
#
# set.seed(942)
#
# pcPerm = adonis2(pcangsd[6:9] ~ site*depth*clusterK2, data = pcangsd, permutations = 999, method = "euclidean")
#
# pcPerm
pcangsd %>% group_by(clusterK4) %>% dplyr::summarize(n = n(), prop = n*0.75)
## # A tibble: 5 × 3
## clusterK4 n prop
## <fct> <int> <dbl>
## 1 Xm1 59 44.2
## 2 Xm2 19 14.2
## 3 Xm1.2 17 12.8
## 4 Xm1.3 12 9
## 5 Admixed 244 183
pcangsd %>% group_by(depth,clusterK4) %>% dplyr::summarize(n = n())
## `summarise()` has grouped output by 'depth'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 3
## # Groups: depth [2]
## depth clusterK4 n
## <fct> <fct> <int>
## 1 Shallow Xm1 55
## 2 Shallow Xm2 19
## 3 Shallow Xm1.2 15
## 4 Shallow Xm1.3 3
## 5 Shallow Admixed 148
## 6 Mesophotic Xm1 4
## 7 Mesophotic Xm1.2 2
## 8 Mesophotic Xm1.3 9
## 9 Mesophotic Admixed 96
Plot PCA
#
pcaPlotSA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = site, shape = depth), color = "black", stroke = 0.25, size = 4, alpha = 0.5, show.legend = FALSE) +
geom_point(data = pcangsd, aes(x = mean.x, y = mean.y, fill = site, shape = depth), color = "black", size = 5, alpha = 1, stroke = 0.5) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = flPal, name = "Site") +
scale_color_manual(values = flPal, name = "Site") +
labs(x = paste0("PC 1 (", format(round(xestoPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = guide_legend(override.aes = list(size = 3, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 3, fill = flPal, alpha = NA), order = 1, ncol = 1)) +
theme_bw()
pcaPlotS = pcaPlotSA +
pcaTheme +
theme(legend.position = c(0.15, 0.24))
pcaPlotK4A = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = clusterK4, shape = depth), color = "black", size = 4, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = c(xestoKColPal[c(1:4,6)]), name = "Lineage") +
labs(x = paste0("PC 1 (", format(round(xestoPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 3, fill = c(xestoKColPal[c(1:4,6)]), alpha = NA), order = 1, ncol = 1))+
theme_bw()
pcaPlotK4 = pcaPlotK4A +
pcaTheme +
theme(legend.position = c(0.12, 0.15))
pcaPlotK2A = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = clusterK2, shape = depth), color = "black", size = 4, show.legend = TRUE) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone", ) +
scale_fill_manual(values = c(xestoKColPal[c(1,2,6)]), name = "Lineage") +
labs(x = paste0("PC 1 (", format(round(xestoPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 3, fill = xestoKColPal[c(1,2,6)], alpha = NA), order = 1, ncol = 1)) +
theme_bw()
pcaPlotK2 = pcaPlotK2A +
pcaTheme +
theme(legend.position = c(0.12, 0.15))
pcaPlotK2

Put all plots together
pcaPlots = (pcaPlotS | pcaPlotK2 | pcaPlotK4) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 18),
panel.background = element_rect(fill = "white"),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
pcaPlots

fig2 = (p8 / pcaPlots) +
plot_layout(heights = c(1.75, 1)) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 20),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
ggsave("../figures/figure2.png", plot = fig2, height = 12, width = 14, units = "in", dpi = 300)
ggsave("../figures/figure2.svg", plot = fig2, height = 12, width = 14, units = "in", dpi = 300)

Genetic Diversity
Lineage demographics
K2ad = read.table("../data/admix/noClones/K2.output") %>% dplyr::select(V6, V7)
K2ad %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 301.7527 49.2473
admixK2 = bams %>%
dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>%
cbind(K2ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7")
k2lineage = admixK2 %>% mutate(cluster = ifelse(Xm1 >= .75, "Xm1", ifelse(Xm2 >= .75, "Xm2", "Admixed")))
K3ad = read.table("../data/admix/noCLones/K3.output") %>% dplyr::select(V6, V7, V8)
K3ad %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 211.2603 43.1265 96.6129
admixK3 = bams %>%
dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>%
cbind(K3ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm3" = "V8")
k3lineage = admixK3 %>% mutate(cluster = ifelse(Xm1 >= .75, "Xm1", ifelse(Xm2 >= .75, "Xm2", ifelse(Xm3 >= .75, "Xm3", "Admixed"))))
K4ad = read.table("../data/admix/noClones/K4.output") %>% dplyr::select(V6, V7, V8, V9)
K4ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
## sum(V6) sum(V7) sum(V8) sum(V9)
## 1 162.8505 38.5994 81.0511 68.4982
admixK4 = bams %>%
dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>%
cbind(K4ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm3" = "V8", "Xm4" = "V9")
k4lineage = admixK4 %>% mutate(cluster = ifelse(Xm1 >= .75, "Xm1", ifelse(Xm2 >= .75, "Xm2", ifelse(Xm3 >= .75, "Xm3", ifelse(Xm4 >= .75, "Xm4","Admixed")))))
k2Bams = k2lineage %>% select(sample, cluster)
k2Bams$sample = gsub("FK", "fk_", k2Bams$sample)
k2Bams$sample = gsub("SF", "sf_", k2Bams$sample)
k2Bams$sample = paste(k2Bams$sample, ".trim.bt2.bam", sep ="")
write_delim(k2Bams, "../data/k2bams", col_names = FALSE)
k3Bams = k3lineage %>% select(sample, cluster)
k3Bams$sample = gsub("FK", "fk_", k3Bams$sample)
k3Bams$sample = gsub("SF", "sf_", k3Bams$sample)
k3Bams$sample = paste(k3Bams$sample, ".trim.bt2.bam", sep ="")
write_delim(k3Bams, "../data/k3bams", col_names = FALSE)
k4Bams = k4lineage %>% select(sample, cluster)
k4Bams$sample = gsub("FK", "fk_", k4Bams$sample)
k4Bams$sample = gsub("SF", "sf_", k4Bams$sample)
k4Bams$sample = paste(k4Bams$sample, ".trim.bt2.bam", sep ="")
write_delim(k4Bams, "../data/k4bams", col_names = FALSE)
k2lineage %>% group_by(cluster) %>% summarise(n = n(), perc = n*0.75)
## # A tibble: 3 × 3
## cluster n perc
## <chr> <int> <dbl>
## 1 Admixed 40 30
## 2 Xm1 292 219
## 3 Xm2 19 14.2
k3lineage %>% group_by(cluster) %>% summarise(n = n(), perc = n*0.75)
## # A tibble: 4 × 3
## cluster n perc
## <chr> <int> <dbl>
## 1 Admixed 162 122.
## 2 Xm1 138 104.
## 3 Xm2 20 15
## 4 Xm3 31 23.2
k4lineage %>% group_by(cluster) %>% summarise(n = n(), perc = n*0.75)
## # A tibble: 5 × 3
## cluster n perc
## <chr> <int> <dbl>
## 1 Admixed 244 183
## 2 Xm1 59 44.2
## 3 Xm2 19 14.2
## 4 Xm3 17 12.8
## 5 Xm4 12 9
hetK2Data = read.delim("../data/het/flXestoK2Het", header = FALSE, sep = " ") %>% rename("k2Het" = "V2") %>% mutate(sample = bams$sample) %>% left_join(pcangsdClustK2) %>% select(sample, k2Het, clusterK2)
## Joining with `by = join_by(sample)`
hetK4Data = read.delim("../data/het/flXestoK4Het", header = FALSE, sep = " ") %>% rename("k4Het" = "V2")%>% mutate(sample = bams$sample) %>% left_join(pcangsdClustK4) %>% select(sample, k4Het, clusterK4)
## Joining with `by = join_by(sample)`
xestoHet = bams %>% left_join(hetK2Data) %>% left_join(hetK4Data)
## Joining with `by = join_by(sample)`
## Joining with `by = join_by(sample)`
hetK2AOV = welch_anova_test(xestoHet, k2Het ~ clusterK2)
hetK2PH = games_howell_test(xestoHet, k2Het ~ clusterK2)
hetK2PH$p.adj
## [1] 0.00000034800000 0.00000000000549 0.00003810000000
hetK2Letters = data.frame(x = factor(c("Xm1", "Xm2", "Admixed")), y = c(0.0025, 0.0025, 0.0025), grp = c("a", "b", "c"))
hetK4AOV = welch_anova_test(xestoHet, k4Het ~ clusterK4)
hetK4PH = games_howell_test(xestoHet, k4Het ~ clusterK4)
hetK4PH$p.adj
## [1] 0.00000084200 0.00006550000 0.07400000000 0.00000000782 0.00001210000 0.00000310000
## [7] 0.00000596000 0.35100000000 0.41500000000 0.93400000000
hetK4Letters = data.frame(x = factor(c("Xm1", "Xm2", "Xm1.2", "Xm1.3","Admixed")), y = c(0.0025, 0.0025, 0.0025, 0.0025, 0.0025), grp = c("a", "b", "c", "ac", "cd"))
hetPlotK2 = ggplot(data = xestoHet, aes(x = clusterK2, y = k2Het)) +
geom_violin(aes(fill = clusterK2, group = clusterK2), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = TRUE, scale = "area", width = 1.2) +
geom_boxplot(aes(fill = clusterK2, group = clusterK2), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) + xlab("Lineage") +
geom_text(data = hetK2Letters, aes(x = x, y = y, label = grp), size = 4) +
annotate(geom = "text", x = 2.95, y = 0, label = sprintf("italic(F) == '%0.2f' * ';' ~ italic(p) < 0.001", hetK2AOV$statistic), size = 3, parse = TRUE) +
scale_fill_discrete(type = xestoKColPal[c(1,2,6)], name = "Lineage") +
xlab("Lineage") +
ylab("Heterozygosity") +
scale_y_continuous(breaks = seq(0, 0.0025, 0.0005)) +
coord_cartesian(expand = TRUE, xlim = c(0.85, 3.15), ylim = c(0, 0.0025)) +
theme_bw() +
violinTheme
hetPlotK2

hetPlotK4 = ggplot(data = xestoHet, aes(x = clusterK4, y = k4Het)) +
geom_violin(aes(fill = clusterK4, group = clusterK4), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = TRUE, scale = "area", width = 1.2) +
geom_boxplot(aes(fill = clusterK4, group = clusterK4), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) + xlab("Lineage") +
geom_text(data = hetK4Letters, aes(x = x, y = y, label = grp), size = 4) +
annotate(geom = "text", x = 4.5, y = 0, label = sprintf("italic(F) == '%0.2f' * ';' ~ italic(p) < 0.001", hetK4AOV$statistic), size = 3, parse = TRUE) +
scale_fill_discrete(type = xestoKColPal[c(1,2,3,4,6)], name = "Lineage") +
xlab("Lineage") +
ylab("Heterozygosity") +
scale_y_continuous(breaks = seq(0, 0.0025, 0.0005)) +
coord_cartesian(expand = TRUE, xlim = c(0.85, 5.15)) +
theme_bw() +
violinTheme
hetPlotK4

pop.order = c("Xm1.3", "Xm1.2", "Xm2", "Xm1")
# reads in fst
fstMa1 <- read.delim("../data/xestoFstK4.out") %>% dplyr::select(-fst) %>% df_to_pw_mat(., "pop1", "pop2", "weightedFst")
fstMa1
## Xm1 Xm2 Xm1.2 Xm1.3
## Xm1 0.000000 0.136042 0.033682 0.020161
## Xm2 0.136042 0.000000 0.113342 0.119521
## Xm1.2 0.033682 0.113342 0.000000 0.036817
## Xm1.3 0.020161 0.119521 0.036817 0.000000
fstMa = fstMa1
upperTriangle(fstMa, byrow = TRUE) <- lowerTriangle(fstMa)
fstMa <- fstMa[,pop.order] %>%
.[pop.order,]
fstMa[upper.tri(fstMa)] <- NA
fstMa <- as.data.frame(fstMa)
# rearrange and reformat matrix
fstMa$Pop = factor(row.names(fstMa), levels = unique(pop.order))
# melt matrix data (turn from matrix into long dataframe)
fst = melt(fstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = FALSE)
fst$Fst = round(fst$Fst, 3)
fstHeatmap = ggplot(data = fst %>% filter(Fst != 0), aes(Pop, Pop2, fill = as.numeric(as.character(Fst)))) +
geom_tile(color = "white") +
geom_segment(data = fst, aes(x = 0.475, xend = 0.15, y = Pop2, yend = Pop2, color = Pop2), size = 20) + #colored boxes for Y-axis labels
geom_segment(data = fst, aes(x = Pop, xend = Pop, y = 0.2, yend = 0.475, color = Pop), size = 22.5) + #colored boxes for X-axis labels
scale_color_manual(values = rev(xestoKColPal[c(1:4)]), guide = NULL) +
scale_fill_gradient(low = "white", high = "gray40", limit = c(0, 0.22), space = "Lab", name = expression(paste(italic("F")[ST])), na.value = NA, guide = "colourbar") +
geom_text(data = fst %>% filter(Fst != 0), aes(Pop, Pop2, label = Fst), color = "black", size = 3.5) +
guides(fill = guide_colorbar(barwidth = 7.5, barheight = 0.75, title.position = "top", title.hjust = 0.5, direction = "horizontal", ticks.colour = "black", frame.colour = "black")) +
scale_y_discrete(position = "left", limits = levels(fst$Pop2)) +
scale_x_discrete(limits = rev(levels(fst$Pop2)[c(1:4)])) +
coord_cartesian(xlim = c(1, 4), ylim = c(1, 4), clip = "off") +
theme_minimal() +
fstTheme
fstHeatmap

Nucleotide diversity
npgList = list(read_tsv("../data/k2Xm1.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=2", lineage = "Xm1"),
read_tsv("../data/k2Xm2.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=2", lineage = "Xm2"),
read_tsv("../data/k4Xm1.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=4", lineage = "Xm1"),
read_tsv("../data/k4Xm2.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=4", lineage = "Xm2"),
read_tsv("../data/k4Xm3.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=4", lineage = "Xm3"),
read_tsv("../data/k4Xm4.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=4", lineage = "Xm4"))
piAll = purrr::reduce(npgList, rbind) %>%
dplyr::group_by(K, lineage) %>%
filter(tP != 0) %>%
mutate(tPps = tP/nSites) %>%
dplyr::summarize(tPps = mean(tPps))
## `summarise()` has grouped output by 'K'. You can override using the `.groups` argument.
piAll$Ne = (piAll$tPps)/(4*2e-8)
piAll
## # A tibble: 6 × 4
## # Groups: K [2]
## K lineage tPps Ne
## <chr> <chr> <dbl> <dbl>
## 1 *K*=2 Xm1 0.00407 50861.
## 2 *K*=2 Xm2 0.00540 67442.
## 3 *K*=4 Xm1 0.00438 54742.
## 4 *K*=4 Xm2 0.00659 82387.
## 5 *K*=4 Xm3 0.00467 58378.
## 6 *K*=4 Xm4 0.00434 54191.
piAll$K = factor(piAll$K)
piAll$lineage = factor(piAll$lineage)
nuclDivPlot = ggplot(piAll, aes(x = K, y = tPps, fill = lineage, group - lineage)) +
geom_rect(aes(xmin = 1.385, xmax = 10, ymin = -1, ymax = 100000), fill = "black", color = NA, alpha = 0.03, linewidth = 0) +
geom_bar(position = position_dodge2(preserve = "single"), stat = "identity", color = "black") +
scale_fill_manual(values = xestoKColPal) +
geom_text(position = position_dodge2(preserve = "single", width = 0.9), aes(y = tPps-.0015, label= scales::comma(round(Ne,0)), hjust = 0, fontface = "bold"), angle = 90, color = "white", ) +
labs(x = bquote(~italic(K)), y = bquote("Nucleotide diversity ("*pi*")")) +
coord_cartesian(xlim = c(0.4, 2.8), ylim = c(0, 0.007), expand = FALSE) +
theme_bw() +
violinTheme +
theme(axis.text.x = ggtext::element_markdown())
nuclDivPlot

Heterozygosity
hetPlots = hetPlotK2 + hetPlotK4 + nuclDivPlot + fstHeatmap +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 14),
legend.text = element_text(size = 8),
legend.title = element_text(size = 10))
hetPlots

ggsave("../figures/figure5.png", plot = hetPlots, width = 8, height = 7, units = "in", dpi = 300)
Size class structure
xmSizeData = read.csv("../data/xestoSEFLSampleData.csv") %>% mutate("sample" = tubeID, "site" = region) %>% dplyr::select(-c("region", "species", "sampleID", "tubeID", "depthFt")) %>% dplyr::relocate(sample, .before = latDD ) %>% dplyr::relocate(site, .before = latDD )
xmSize = xmSizeData %>% left_join(pcangsd) %>% left_join(admixK2) %>% dplyr::select(sample, site, depthm, latDD, Xm1, Xm2, PC1, PC2, PC3, clusterK2, clusterK4, h, od, bd) %>% dplyr::mutate(volume = ((1/12)*pi*h*(od^2 + od * bd + bd^2))) %>% mutate(sizeClass = case_when(volume <= 143.13 ~ "I",
volume <= 1077.13 ~ "II",
volume <= 5666.32 ~ "III",
volume <= 17383.97 ~ "IV",
volume > 17383.97 ~ "V"))
## Joining with `by = join_by(sample, site)`
## Joining with `by = join_by(sample, site, depth, depthm)`
xmSize$sizeClass <- ordered(xmSize$sizeClass, levels = c("I", "II", "III", "IV", "V"))
xmSize$site = factor(xmSize$site)
xmSize$site = factor(xmSize$site, levels = levels(xmSize$site)[c(3,4,1,2)])
volumePlot = ggplot(data = xmSize, aes(x = log(volume))) +
# geom_density()+
geom_histogram(bins=5)+
scale_fill_manual(values = xestoKColPal[c(1,2,6)], name = "Lineage") +
# scale_x_discrete(drop = FALSE)+
xlab("log(Volume)") +
ylab("Density") +
theme_bw() +
violinTheme +
theme(legend.position = "right")
volumePlot

# By Lineage
xmTableLineage = table(xmSize$sizeClass, xmSize$clusterK2)
fisher.test(xmTableLineage, simulate.p.value = TRUE, B = 100000)
##
## Fisher's Exact Test for Count Data with simulated p-value (based on 100000
## replicates)
##
## data: xmTableLineage
## p-value = 0.00001
## alternative hypothesis: two.sided
## Proportions per lineage
prop_lineage <- prop.table(xmTableLineage, margin = 2)
round(prop_lineage, 3)
##
## Xm1 Xm2 Admixed
## I 0.000 0.000 0.000
## II 0.022 0.316 0.250
## III 0.157 0.579 0.250
## IV 0.180 0.053 0.250
## V 0.640 0.053 0.250
## Expected counts under independence
row_totals <- rowSums(xmTableLineage)
col_totals <- colSums(xmTableLineage)
grand_total <- sum(xmTableLineage)
expected <- outer(row_totals, col_totals) / grand_total
round(expected, 2)
## Xm1 Xm2 Admixed
## I 0.00 0.00 0.0
## II 8.16 1.74 1.1
## III 20.77 4.43 2.8
## IV 14.83 3.17 2.0
## V 45.24 9.66 6.1
## Difference between observed and expected
### Neg under-representation, Pos over-
diffLineage <- xmTableLineage - expected
diffLineage
##
## Xm1 Xm2 Admixed
## I 0.000000 0.000000 0.000000
## II -6.158333 4.258333 1.900000
## III -6.766667 6.566667 0.200000
## IV 1.166667 -2.166667 1.000000
## V 11.758333 -8.658333 -3.100000
# By Site
xmTableSite = table(xmSize$sizeClass, xmSize$site)
fisher.test(xmTableSite, simulate.p.value = TRUE, B = 100000)
##
## Fisher's Exact Test for Count Data with simulated p-value (based on 100000
## replicates)
##
## data: xmTableSite
## p-value = 0.00001
## alternative hypothesis: two.sided
## Proportions per site
prop_lineage <- prop.table(xmTableSite, margin = 2)
round(prop_lineage, 3)
##
## Jupiter West Palm Boynton Ft. Lauderdale
## I 0.000 0.000 0.000 0.000
## II 0.167 0.000 0.000 0.200
## III 0.300 0.033 0.067 0.533
## IV 0.233 0.033 0.233 0.167
## V 0.300 0.933 0.700 0.100
# Expected counts under independence
row_totals <- rowSums(xmTableSite)
col_totals <- colSums(xmTableSite)
grand_total <- sum(xmTableSite)
expected <- outer(row_totals, col_totals) / grand_total
round(expected, 2)
## Jupiter West Palm Boynton Ft. Lauderdale
## I 0.00 0.00 0.00 0.00
## II 2.75 2.75 2.75 2.75
## III 7.00 7.00 7.00 7.00
## IV 5.00 5.00 5.00 5.00
## V 15.25 15.25 15.25 15.25
#Difference between observed and expected
##Neg under-representation, Pos over-
diffSite <- xmTableSite - expected
diffSite
##
## Jupiter West Palm Boynton Ft. Lauderdale
## I 0.00 0.00 0.00 0.00
## II 2.25 -2.75 -2.75 3.25
## III 2.00 -6.00 -5.00 9.00
## IV 2.00 -4.00 2.00 0.00
## V -6.25 12.75 5.75 -12.25
xmLm = lmer(log(volume) ~ Xm1 + (1 | site), data = xmSize)
# xmLm = lmer(volume ~ Xm1 + (1 | latDD), data = xmSize)
qqnorm(residuals(xmLm))
qqline(residuals(xmLm))

xmLm2 = lmer(log(volume) ~ Xm1 + depthm + (1 | site), data = xmSize)
qqnorm(residuals(xmLm2))
qqline(residuals(xmLm2))

check_collinearity(xmLm2)
## # Check for Multicollinearity
##
## Low Correlation
##
## Term VIF VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
## Xm1 1.02 [1.00, 35.94] 1.01 0.98 [0.03, 1.00]
## depthm 1.02 [1.00, 35.94] 1.01 0.98 [0.03, 1.00]
anova(xmLm, xmLm2)
## refitting model(s) with ML (instead of REML)
## Data: xmSize
## Models:
## xmLm: log(volume) ~ Xm1 + (1 | site)
## xmLm2: log(volume) ~ Xm1 + depthm + (1 | site)
## npar AIC BIC logLik -2*log(L) Chisq Df Pr(>Chisq)
## xmLm 4 396.01 407.16 -194.00 388.01
## xmLm2 5 396.42 410.35 -193.21 386.42 1.5949 1 0.2066
summary(xmLm)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(volume) ~ Xm1 + (1 | site)
## Data: xmSize
##
## REML criterion at convergence: 387.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.91177 -0.72401 0.00932 0.67260 2.53704
##
## Random effects:
## Groups Name Variance Std.Dev.
## site (Intercept) 0.8318 0.912
## Residual 1.3706 1.171
## Number of obs: 120, groups: site, 4
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.0097 0.5795 13.823
## Xm1 1.9081 0.4399 4.337
##
## Correlation of Fixed Effects:
## (Intr)
## Xm1 -0.589
#effect size, converted from log
exp(fixef(xmLm)["Xm1"])
## Xm1
## 6.739935
coef <- fixef(xmLm)["Xm1"]
fc <- exp(coef)
ci <- exp(confint(xmLm, parm = "Xm1", method = "Wald"))
# R² values
r2vals <- r.squaredGLMM(xmLm)
# Semi-partial R²
r2beta_vals <- r2beta(xmLm, method = "nsj")
report <- data.frame(
Term = "Xm1 (Xm2 vs Xm1)",
Estimate = round(coef, 3),
SE = round(summary(xmLm)$coefficients["Xm1", "Std. Error"], 3),
t_value = round(summary(xmLm)$coefficients["Xm1", "t value"], 3),
FoldChange = round(fc, 2),
CI_lower = round(ci[1], 2),
CI_upper = round(ci[2], 2),
R2_marginal = round(r2vals[1], 2),
R2_conditional = round(r2vals[2], 2),
R2_partial = round(r2beta_vals$Rsq[2], 2)
)
print(report)
## Term Estimate SE t_value FoldChange CI_lower CI_upper R2_marginal
## Xm1 Xm1 (Xm2 vs Xm1) 1.908 0.44 4.337 6.74 2.85 15.96 0.18
## R2_conditional R2_partial
## Xm1 0.49 0.18
# Random effects
ranef_summary <- as.data.frame(VarCorr(xmLm))
print(ranef_summary)
## grp var1 var2 vcov sdcor
## 1 site (Intercept) <NA> 0.8317721 0.9120154
## 2 Residual <NA> <NA> 1.3705975 1.1707252
xmSizePred <- data.frame(
Xm1 = seq(min(xmSize$Xm1), max(xmSize$Xm1), length.out = 100)
)
# Marginal predictions (fixed effects only)
xmSizePred$pred_marginal <- predict(xmLm, newdata = xmSizePred, re.form = NA)
# Conditional predictions (fixed + random effects)
# This requires making predictions for each site
xmSize$pred_conditional <- predict(xmLm, newdata = xmSize)
xmList = read_csv("../data/flXestoMetaData.csv")
## Rows: 366 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): sampleID, date, sample, region, site, species, depthZone
## dbl (4): latDD, longDD, depthFt, depthM
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
xmList$sample = gsub("\\.[1-3]*$", "", xmList$sample)
xmList = xmList %>% select(-site) %>% dplyr::left_join(xmSize) %>% dplyr::filter(clusterK2 == "Admixed")
## Joining with `by = join_by(sample, latDD)`
xmList
## # A tibble: 12 × 25
## sampleID date sample region species latDD longDD depthFt depthM depthZone site
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <fct>
## 1 02-IX-20-3-0… 9/2/… SFX025 West … Xestos… 26.7 -80.0 51 15.5 Shallow West…
## 2 10-IX-20-1-0… 9/10… SFX034 Jupit… Xestos… 26.9 -80.0 66 20.1 Shallow Jupi…
## 3 10-IX-20-2-0… 9/10… SFX042 Jupit… Xestos… 26.9 -80.0 68 20.7 Shallow Jupi…
## 4 10-IX-20-2-0… 9/10… SFX043 Jupit… Xestos… 26.9 -80.0 71 21.6 Shallow Jupi…
## 5 10-IX-20-2-0… 9/10… SFX045 Jupit… Xestos… 26.9 -80.0 71 21.6 Shallow Jupi…
## 6 11-XII-20-1-… 12/1… SFX062 Ft. L… Xestos… 26.2 -80.1 17 5.2 Shallow Ft. …
## 7 11-XII-20-2-… 12/1… SFX073 Ft. L… Xestos… 26.1 -80.1 21 6.4 Shallow Ft. …
## 8 11-XII-20-2-… 12/1… SFX080 Ft. L… Xestos… 26.1 -80.1 20 6.1 Shallow Ft. …
## 9 11-XII-20-3-… 12/1… SFX081 Ft. L… Xestos… 26.1 -80.1 25 7.6 Shallow Ft. …
## 10 11-XII-20-3-… 12/1… SFX083 Ft. L… Xestos… 26.1 -80.1 23 7 Shallow Ft. …
## 11 21-I-21-3-004 1/21… SFX114 Boynt… Xestos… 26.5 -80.0 54 16.5 Shallow Boyn…
## 12 21-I-21-3-009 1/21… SFX119 Boynt… Xestos… 26.5 -80.0 60 18.3 Shallow Boyn…
## # ℹ 14 more variables: depthm <dbl>, Xm1 <dbl>, Xm2 <dbl>, PC1 <dbl>, PC2 <dbl>,
## # PC3 <dbl>, clusterK2 <fct>, clusterK4 <fct>, h <dbl>, od <dbl>, bd <dbl>,
## # volume <dbl>, sizeClass <ord>, pred_conditional <dbl>
xmGLMMplot = ggplot(xmSize, aes(x = Xm1, y = log(volume))) +
geom_point(aes(color = site), alpha = 0.6, size = 2.5) +
# Add a line for the overall marginal effect (fixed effects)
geom_line(data = xmSizePred, aes(x = Xm1, y = pred_marginal),
linetype = "dashed", color = "black", linewidth = 1, ) +
# Add a line for the conditional prediction (fixed + random effects) for each site
geom_line(aes(y = pred_conditional, group = site,
color = site), linewidth = 0.8) +
labs(x = "Admixture proportion (Xm1)",
y = "Log(Volume)") +
scale_color_manual(name = "Site", values = flPal)+
theme_bw() +
violinTheme
xmGLMMplot

sizeClassK2 = ggplot(data = xmSize, aes(x = sizeClass, fill = clusterK2, group = clusterK2)) +
# geom_histogram(bins=5)+
geom_bar(color = "grey25", linewidth= 0.25) +
scale_fill_manual(values = xestoKColPal[c(1,2,6)], name = "Lineage") +
scale_x_discrete(drop = FALSE)+
xlab("Size class") +
ylab("Count") +
theme_bw() +
violinTheme +
theme(legend.position = "right")
sizeClassK2

sizeClassK2site = ggplot(data = xmSize, aes(x = sizeClass, fill = site, group = site)) +
# geom_density(alpha = 0.25, adjust = 1.2) +
geom_bar(color = "grey25", linewidth= 0.25) +
scale_fill_manual(values = flPal, name = "Site") +
scale_x_discrete(drop = FALSE) +
xlab("Size class") +
ylab("Count") +
theme_bw() +
violinTheme +
theme(legend.position = "right")
sizeClassK2site

# xmPics =image_read("../figures/xmPics.png") %>% image_ggplot()
xmSizeClassPlots = (sizeClassK2 + sizeClassK2site)/xmGLMMplot +
plot_layout(heights = c(1, 1), guides = "collect") +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 20),
#legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
ggsave("../figures/figure4.png", plot = xmSizeClassPlots, dpi =300, height = 16, width = 20, units = "cm")
Hybridization
K3ad = read.table("../data/admix/noClones/K3.output") %>% dplyr::select(V6, V7, V8)
K3ad %>% summarise(sum(V6),sum(V7), sum(V8))
## sum(V6) sum(V7) sum(V8)
## 1 211.2603 43.1265 96.6129
admixK3 = bams %>%
dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>%
cbind(K3ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm3" = "V8")
admixK3div = admixK3 %>% mutate(cluster = ifelse(Xm1 >= .97, "Xm1", ifelse(Xm2 >= .97, "Xm2", ifelse(between(Xm1, 0.40, 0.60) & between(Xm2, 0.40, 0.60) , "Hyb", ifelse(between(Xm1, 0.65, 0.85) & between(Xm2, 0.15, 0.35),"Hyb", ifelse(between(Xm2, 0.65, 0.85) & between(Xm1, 0.15, 0.35),"Hyb", "NA")))))) %>% filter(cluster != "NA")
admixK3div %>% group_by(cluster) %>% summarise(n = n())
## # A tibble: 3 × 2
## cluster n
## <chr> <int>
## 1 Hyb 25
## 2 Xm1 59
## 3 Xm2 13
flXestoAdmixK3div = admixK3div %>% filter(sample != "FKX006")
#
# flXestoAdmixK3Melt = melt(flXestoAdmixK3div, id.vars=c("sample", "site", "depth", "depthm"), variable.name="Ancestry", value.name="Fraction")
K2ad = read.table("../data/admix/noClones/K2.output") %>% dplyr::select(V6, V7)
K2ad %>% summarise(sum(V6),sum(V7))
## sum(V6) sum(V7)
## 1 301.7527 49.2473
admixK2 = bams %>%
dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>%
cbind(K2ad) %>%
dplyr::rename("Xm1" = "V6", "Xm2" = "V7")
admixK2div = admixK2 %>% mutate(cluster = ifelse(Xm1 >= 1, "Xm1", ifelse(Xm2 >= 1, "Xm2", ifelse(between(Xm1, 0.40, 0.60) & between(Xm2, 0.40, 0.60) , "Hyb", ifelse(between(Xm1, 0.60, 0.90) & between(Xm2, 0.1, 0.40),"Hyb", ifelse(between(Xm2, 0.60, 0.90) & between(Xm1, 0.1, 0.40),"Hyb", "NA")))))) %>% filter(cluster != "NA")
flXestoAdmixK2 = admixK2div %>% filter(sample %in% flXestoAdmixK3div$sample) %>% arrange(-Xm1)
flXestoAdmixK2$ord = c(1:length(flXestoAdmixK2$sample))
flXestoAdmixK2Melt = melt(flXestoAdmixK2, id.vars=c("sample", "site", "depth", "depthm", "cluster", "ord"), variable.name="Lineage", value.name="Fraction")
admixPlotK2Diva = ggplot(data = flXestoAdmixK2Melt, aes(x = ord, y = Fraction, fill = Lineage, order = ord)) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", linewidth = 0.2) +
scale_fill_manual(values = xestoKColPal) +
scale_color_manual(values = rev(flPal)) +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
coord_cartesian(ylim = c(0.0, 1.0), clip = "off") +
theme_bw()
admixPlotK2Div = admixPlotK2Diva +
admixTheme +
theme(legend.position = "right")
admixPlotK2Div

div_groups = data.frame("sample" = flXestoAdmixK2$sample, "group" = flXestoAdmixK2$cluster) %>% arrange(sample)
div_groups$sample = gsub(pattern = "SF", replacement = "sf_", div_groups$sample)
div_groups$sample = gsub(pattern = "FK", replacement = "fk_", div_groups$sample)
div_groups$sample = paste(div_groups$sample, ".trim.bt2.bam", sep = "")
div_groups$sample
## [1] "fk_X004.trim.bt2.bam" "fk_X016.trim.bt2.bam" "fk_X043.trim.bt2.bam"
## [4] "fk_X051.trim.bt2.bam" "fk_X056.trim.bt2.bam" "fk_X059.trim.bt2.bam"
## [7] "fk_X064.trim.bt2.bam" "fk_X068.trim.bt2.bam" "fk_X087.trim.bt2.bam"
## [10] "fk_X089.trim.bt2.bam" "fk_X100.trim.bt2.bam" "fk_X114.trim.bt2.bam"
## [13] "fk_X119.trim.bt2.bam" "fk_X122.trim.bt2.bam" "fk_X147.trim.bt2.bam"
## [16] "fk_X184.trim.bt2.bam" "fk_X186.trim.bt2.bam" "fk_X188.trim.bt2.bam"
## [19] "fk_X189.trim.bt2.bam" "fk_X205.trim.bt2.bam" "fk_X211.trim.bt2.bam"
## [22] "fk_X215.trim.bt2.bam" "fk_X219.trim.bt2.bam" "fk_X224.trim.bt2.bam"
## [25] "fk_X227.trim.bt2.bam" "fk_X229.trim.bt2.bam" "fk_X230.trim.bt2.bam"
## [28] "sf_X008.trim.bt2.bam" "sf_X012.trim.bt2.bam" "sf_X015.trim.bt2.bam"
## [31] "sf_X023.trim.bt2.bam" "sf_X024.trim.bt2.bam" "sf_X025.trim.bt2.bam"
## [34] "sf_X026.trim.bt2.bam" "sf_X028.trim.bt2.bam" "sf_X033.trim.bt2.bam"
## [37] "sf_X034.trim.bt2.bam" "sf_X036.trim.bt2.bam" "sf_X037.trim.bt2.bam"
## [40] "sf_X039.trim.bt2.bam" "sf_X040.trim.bt2.bam" "sf_X043.trim.bt2.bam"
## [43] "sf_X061.trim.bt2.bam" "sf_X063.trim.bt2.bam" "sf_X064.trim.bt2.bam"
## [46] "sf_X065.trim.bt2.bam" "sf_X066.trim.bt2.bam" "sf_X067.trim.bt2.bam"
## [49] "sf_X068.trim.bt2.bam" "sf_X071.trim.bt2.bam" "sf_X072.trim.bt2.bam"
## [52] "sf_X073.trim.bt2.bam" "sf_X076.trim.bt2.bam" "sf_X077.trim.bt2.bam"
## [55] "sf_X079.trim.bt2.bam" "sf_X080.trim.bt2.bam" "sf_X083.trim.bt2.bam"
## [58] "sf_X084.trim.bt2.bam" "sf_X086.trim.bt2.bam" "sf_X090.trim.bt2.bam"
## [61] "sf_X093.trim.bt2.bam" "sf_X094.trim.bt2.bam" "sf_X103.trim.bt2.bam"
## [64] "sf_X105.trim.bt2.bam" "sf_X107.trim.bt2.bam" "sf_X110.trim.bt2.bam"
## [67] "sf_X112.trim.bt2.bam" "sf_X113.trim.bt2.bam" "sf_X114.trim.bt2.bam"
## [70] "sf_X117.trim.bt2.bam" "sf_X119.trim.bt2.bam"
write_delim(div_groups, "../data/div_groups", col_names = FALSE)
div_groups %>% select("sample") %>% write_delim(., "../data/div_samples", col_names = FALSE)
maDiv = as.matrix(read.table("../data/xestoDiv.ibsMat"))
divSamples = admixK2div %>% filter(sample %in% flXestoAdmixK3div$sample) %>% select(sample)
dimnames(maDiv) = list(divSamples[,1],divSamples[,1])
trdiv = hclust(dist(maDiv),"ave")
#ggtree(trdiv) + geom_text(aes(label = node), hjust = -.5)
p1div = ggtree(trdiv, layout = "rectangular") +
annotate(geom = "rect", xmin = 0.325, xmax = -.62, ymin = 0.5, ymax = 13.5, fill = xestoKColPal[2], alpha = 0.5, size = .4) +
annotate(geom = "rect", xmin = 0.325, xmax = -.53, ymin = 13.5, ymax = 37.5, fill = xestoKColPal[6], alpha = 0.5, size = .4) +
annotate(geom = "rect", xmin = 0.325, xmax = -.39, ymin = 37.5, ymax = 71.5, fill = xestoKColPal[1], alpha = 0.5, size = .4) +
geom_point2(aes(subset = (node == 73)), shape = 21, size = 4.5, fill=xestoKColPal[6]) +
geom_point2(aes(subset = (node == 86)), shape = 21, size = 4.5, fill = xestoKColPal[1]) +
geom_point2(aes(subset = (node == 74)), shape = 21, size = 4.5, fill = xestoKColPal[2]) +
geom_cladelabel(node = 74, label = "Xm2", barsize = 0, hjust = 8.75) +
geom_cladelabel(node = 86, label = "Xm1", barsize = 0, hjust = 3.85, vjust = 5.1) +
geom_cladelabel(node = 73, label = "Admixed", barsize = 0, hjust = 3.9, vjust = 8.6)
p2div = p1div + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = site), stat='identity', linewidth = 0.1, color = "grey25", pwidth = 0.05, offset = 0.72) +
scale_fill_manual("Site", values = flPal, guide = guide_legend(ncol = 2, order = 1)) +
theme(legend.key.size = unit(.8, "line")) +
new_scale_fill()
p3div = p2div + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = depth), stat='identity', linewidth = 0.1, color = "grey25", pwidth = 0.05, offset = 2.26) +
scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(ncol = 2, order = 2)) +
new_scale_fill()
p4div = p3div + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = depthm), stat='identity', color = "grey25", linewidth = 0.1, pwidth = 0.05, offset = 5.34) +
scale_fill_viridis_c("Depth (m)", option = "mako", trans = "reverse") +
new_scale_fill()
p5div = p4div + geom_fruit(data = admixK2_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable),
stat='identity', color = "grey25", pwidth = 0.45, offset = 11.57, linewidth = 0.1) +
scale_fill_manual("Lineage",values = xestoKColPal[1:5]) +
coord_cartesian(xlim = c(-0.7,0.5)) +
theme_tree(legend.position = c(0.97, 0.5))
# p5div
# p2div = facet_plot(p1div, panel = "location", data=admixK2, geom = geom_tile, mapping = aes(x = 1, fill = site), size = 0.1, color = "grey25") +
# scale_fill_manual("Site", values = flPal, guide = guide_legend(ncol = 2, order = 1)) +
# theme(legend.key.size = unit(.8, "line")) +
# new_scale_fill()
#
# p3div = facet_plot(p2div, panel = "depth zone", data=admixK2, geom = geom_tile, mapping = aes(x = 1, fill = depth), size = 0.1, color = "grey25") +
# scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(ncol = 2, order = 2)) +
# new_scale_fill()
#
# p4div = facet_plot(p3div, panel = "depth", data=admixK2, geom = geom_tile, mapping = aes(x = 1, fill = depthm), size = 0.1, color = "grey25") +
# scale_fill_viridis_c("Depth (m)", option = "mako", direction = -1, guide = guide_colorbar(order = 2, direction = "horizontal")) +
# theme(legend.title.position = "top") +
# new_scale_fill()
#
# p5div = facet_plot(p4div, panel="K = 2", data = admixK2_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
# stat ='identity', size = 0.15, color = "grey25") +
# scale_fill_manual("Lineage",values = xestoKColPal[1:2], guide = guide_legend(ncol = 2, order = 4)) +
# theme(strip.text = element_blank(),
# #legend.direction = "horizontal",
# legend.text = element_text(size = 10),
# legend.title = element_text(size = 10),
# legend.key = element_blank(),
# legend.margin = margin(c(0,0,0,0),unit = "pt"))
covDiv = read.table("../data/pcangsd/flXestoDivPcangsd.cov") %>% as.matrix()
pcaEigDiv = eigen(covDiv)
xestoPcaVarDiv = pcaEigDiv$values/sum(pcaEigDiv$values)*100
head(xestoPcaVarDiv)
## [1] 17.668624 4.996007 1.747479 1.600818 1.570501 1.524815
pcangsdDiv = bams %>% dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% filter(sample %in% flXestoAdmixK2$sample) %>% left_join(flXestoAdmixK2)
## Joining with `by = join_by(sample, site, depth, depthm)`
pcangsdDiv$PC1 = pcaEigDiv$vectors[,1]
pcangsdDiv$PC2 = pcaEigDiv$vectors[,2]
pcangsdDiv$PC3 = pcaEigDiv$vectors[,3]
pcangsdDiv$PC4 = pcaEigDiv$vectors[,4]
pcangsdDiv = pcangsdDiv %>% mutate(cluster = ifelse(Xm1 >= 0.98, "Xm1", ifelse(Xm2 >= 0.98, "Xm2", "Admixed")))
pcangsdDiv$cluster = as.factor(pcangsdDiv$cluster)
levels(pcangsdDiv$cluster)
## [1] "Admixed" "Xm1" "Xm2"
pcangsdDiv$cluster = factor(pcangsdDiv$cluster, levels = levels(pcangsdDiv$cluster)[c(2, 3, 1)])
Plot PCA
pcaPlot1Diva = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = pcangsdDiv, aes(x = PC1, y = PC2, fill = cluster, shape = depth), color = "black", stroke = 0.25, size = 4, show.legend = TRUE) +
scale_fill_manual(name = "Lineage", values = xestoKColPal[c(1,2,6)]) +
scale_shape_manual(name = "Depth zone", values = c(21,23)) +
labs(x = paste0("PC 1 (", format(round(xestoPcaVarDiv[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoPcaVarDiv[2], 1), nsmall = 1), " %)")) +
guides(shape = guide_legend(override.aes = list(size = 3, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 3, fill = xestoKColPal[c(1,2,6)], alpha = NA), order = 1, ncol = 1)) +
theme_bw()
pcaPlot1Div = pcaPlot1Diva +
pcaTheme +
theme(legend.position = c(0.12, 0.25),
legend.key.spacing = unit(2, "pt"),
legend.spacing = unit(0.25, "pt"),
legend.margin = margin(4,2,4,2, unit = "pt"),
legend.background = element_blank())
pcaPlot1Div

xestoAF = read.delim(file = "../data/flXestoAlleleFreq.txt")
xestoAF$chrom = paste(xestoAF$chrom, xestoAF$pos, sep = "_")
xm1mjrAlleles = xestoAF %>% filter(majFreq > 0.85, pop == "Xm1") %>% group_by(chrom, pos)
xm2mnrAlleles = xestoAF %>% filter(minFreq > 0.85, pop == "Xm2") %>% group_by(chrom, pos)
snpList.a = xm2mnrAlleles$chrom
snps.a = xm1mjrAlleles %>% filter(chrom %in% snpList.a) %>% bind_rows(xm2mnrAlleles)
xm2mjrAlleles = xestoAF %>% filter(majFreq > 0.85, pop == "Xm2") %>% group_by(chrom, pos)
xm1mnrAlleles = xestoAF %>% filter(minFreq > 0.85, pop == "Xm1") %>% group_by(chrom, pos)
snpList.b = xm1mnrAlleles$chrom
snps.b = xm2mjrAlleles %>% filter(chrom %in% snpList.b) %>% bind_rows(xm1mnrAlleles)
snps = snps.a %>% bind_rows(snps.b) %>% group_by(chrom) %>% summarise(n = n()) %>% filter(n == 2) %>% select(!n)
write_delim(snps, "../data/snpList")
altSnps = snps %>% left_join(xestoAF) %>% select(chrom, majFreq, pop)
## Joining with `by = join_by(chrom)`
altSnps$pop = factor(altSnps$pop)
altSnps$pop = factor(altSnps$pop, levels = levels(altSnps$pop)[c(1,3,2)])
levels(altSnps$pop) = c("Xm1","Admixed","Xm2")
altSnpsMelt = melt(altSnps, id.vars = c("chrom", "pop"))
divPlot = ggplot() +
geom_tile(data = altSnpsMelt, aes(x = pop, y = chrom, fill = value)) +
# ggplot2::geom_text(data = altSnpsMelt, aes(x = pop, y = chrom, label = round(value, 2))) + # scale_fill_gradientn(colors = rev(c("#645A9FFF", "#B696D6FF", "#F6E2FBFF")))
scale_fill_gradientn(name = "Major allele \nfrequency",colors = c("#3FB8AFFF", "#ADCEA9", "#DAD8A7", "#E4AB9B","#FF3D7FFF")) +
labs(x = "Lineage", y = "SNP locus") +
theme(axis.text.y = element_blank(),
axis.text.x = element_text(angle = 0, color = "black", size = 10),
axis.ticks = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
plot.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.key.size = unit(.8, "line"))
# ftl.snps.vcf = snps
# ftl.snps.vcf$pos = ftl.snps.vcf$chrom
# ftl.snps.vcf$chrom = gsub("\\_.*", "", ftl.snps.vcf$chrom)
# ftl.snps.vcf$pos = gsub(".*_", "", ftl.snps.vcf$pos)
#
# # write_delim(ftl.snps.vcf, "../data/ftl/ftlSnpsVcf", col_names = FALSE)
Divergent snps heterozygosities
divHet = read.delim("../data/divHetOut", header = FALSE, sep = " ") %>% rename("het" = "V2") %>% cbind(lineage = div_groups$group, sample = div_groups$sample) %>% select(-V1)
divHet$lineage = as.factor(divHet$lineage)
divHet$lineage = factor(divHet$lineage, levels = levels(divHet$lineage)[c(2,1,3)])
levels(divHet$lineage) = c("Xm1", "Admixed", "Xm2")
divAOV = welch_anova_test(divHet, het ~ lineage)
divPH = games_howell_test(divHet, het ~ lineage)
divPH$p.adj
## [1] 0.0000543 0.8120000 0.0000318
hetLetters = data.frame(x = factor(c("Xm1", "Admixed", "Xm2")), y = c(1, 1, 1), grp = c("a", "b", "a"))
divStat = divAOV$statistic
hetPlotDiv = ggplot(data = divHet, aes(x = lineage, y = het)) +
geom_violin(aes(fill = lineage, group = lineage), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = TRUE, scale = "area", width = 1.2) +
geom_boxplot(aes(fill = lineage, group = lineage), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) + xlab("Lineage") +
geom_text(data = hetLetters, aes(x = x, y = y, label = grp), size = 4) +
annotate(geom = "text", x = 2.9, y =0.85, label = sprintf("italic(F) == '%0.2f' * ';' ~ italic(p) < 0.001", divAOV$statistic), size = 3, parse = TRUE) +
scale_fill_discrete(type = xestoKColPal[c(1, 6, 2)], name = "Lineage") +
xlab("Lineage") +
ylab("Heterozygosity") +
coord_cartesian(expand = TRUE, xlim = c(0.85, 3.15)) +
theme_bw() +
violinTheme
hetPlotDiv

combined plots:
xmDiv = ((p5div) /((pcaPlot1Div + hetPlotDiv + divPlot) + plot_layout(widths = c(3, 2, 1)))) + plot_layout(heights = c(2, 1.5)) +
# plot_layout(design = layout) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 20),
legend.text = element_text(size = 10),
legend.title = element_text(size = 10),
legend.spacing = unit(-5, "pt"),
legend.key = element_blank(),
legend.background = element_blank())
xmDiv

# ggsave("../figures/figure6.png", plot = xmDiv, height = 9, width = 12, units = "in", dpi = 300)
ggsave("../figures/figure6.svg", plot = xmDiv, height = 9, width = 12, units = "in", dpi = 300)
Genetic connectivity
Migration
mainBams = read.delim("../data/bamsMainCluster", header = FALSE) %>% rename("sample" = "V1")
mainBams$sample = gsub("\\.trim.bt2.bam", "", mainBams$sample)
mainBams$sample = gsub("fk_", "FK", mainBams$sample)
mainBams$sample = gsub("sf_", "SF", mainBams$sample)
xestoMainPops = mainBams %>% left_join(bams) %>% mutate("site" = paste(region,depthZone)) %>% select(sample, site)
## Joining with `by = join_by(sample)`
xestoMainPops$site = gsub(" ", "", xestoMainPops$site)
xestoMainPops$site = gsub("'", "", xestoMainPops$site)
xestoMainPops$sample = gsub("FK", "fk_", xestoMainPops$sample)
xestoMainPops$sample = gsub("SF", "sf_", xestoMainPops$sample)
xestoMainPops$sample = paste(xestoMainPops$sample, ".trim.bt2.bam", sep ="")
write_delim(xestoMainPops, "../data/xestoMainPops", col_names = FALSE)
Checking deviance among model runs from BayesAss we ran on HPC
# fileList = substr(list.files("../data/snps/bayesAss/", "BA3trace.*.txt$"),1,10)
fileList = substr(list.files("../data/bayesass/", "BA3trace.*.txt$"),1,11)
bayesian_deviance <- function(trace, burnin = 0, sampling.interval = 0){
if(burnin == 0) stop('No burnin specified')
if(sampling.interval == 0) stop('No sampling interval specified')
range <- (trace$State > burnin & trace$State %% sampling.interval == 0)
D <- -2*mean(trace$LogProb[range])
return(D)
}
baRuns = setNames(data.frame(matrix(ncol = 2, nrow = length(fileList))), c("file", "bD"))
for(i in 1:length(fileList)){
assign(fileList[i], read.delim(paste("../data/bayesass/", fileList[i], ".txt", sep = ""))) %>% dplyr::select(-last_col())
baRuns$file[i] = fileList[i]
baRuns$bD[i] = (bayesian_deviance(get(fileList[i]), burnin = 2000000, sampling.interval = 1000))
}
bestBA = baRuns %>% dplyr::filter(bD == min(bD)) %>% select(file) %>% as.list()
#[1] "BA3trace.01 4277982.0575"
#[1] "BA3trace.02 4278000.6675"
#[1] "BA3trace.03 4277371.5925"
#[1] "BA3trace.04 4274620.0525"
#[1] "BA3trace.05 4278321.1175"
#[1] "BA3trace.06 4277084.2950"
#[1] "BA3trace.07 4276766.8200"
#[1] "BA3trace.08 4277712.3800"
#
bestBA
## $file
## [1] "BA3trace.04"
All traces have similar deviance (this is good!). Using the trace
with the lowest deviance (BA3trace.04, in this case)
bayesAss = read.delim(paste("../data/bayesass/",bestBA,".txt", sep = "")) %>% filter(State > 2000000) %>% dplyr::select(-State, -LogProb, -X)
baMean = bayesAss %>% summarise(across(everything(), list(mean))) %>% t() %>% as_tibble() %>% rename(., mean=V1) %>% mutate(pops = colnames(bayesAss))
baSumm = bayesAss %>% summarise(across(everything(), list(median))) %>% t() %>% as.data.frame() %>% rename(., median=V1) %>% mutate(pops = baMean$pops, mean = round(baMean$mean, 3)) %>% relocate(median, .after = mean)
baSumm$median = round(baSumm$median, 3)
baHpd =as.data.frame(t(sapply(bayesAss, emp.hpd)))
colnames(baHpd) = c("hpdLow", "hpdHigh")
baHpd$pops = rownames(baHpd)
ESS = as.data.frame(sapply(bayesAss, ESS))
colnames(ESS) = "ESS"
baSumm = baSumm %>% left_join(baHpd)
## Joining with `by = join_by(pops)`
baSumm$hpdLow = round(baSumm$hpdLow, 3)
baSumm$hpdHigh = round(baSumm$hpdHigh, 3)
baSumm$ESS = ESS$ESS
### FROM BAYESASS: ###
## Population Index -> Population Label:
#0->UpperKeysMesophotic 1->UpperKeysShallow
#2->LowerKeysShallow 3->TortugasBankMesophotic
#4->TortugasBankShallow 5->RileysHumpMesophotic
#6->RileysHumpShallow 7->LowerKeysMesophotic
#8->WestPalmShallow 9->JupiterShallow
#10->Ft.LauderdaleShallow 11->BoyntonShallow
popi = rep(c("Upper Keys\nMesophotic", "Upper Keys\nShallow", "Lower Keys\nShallow", "Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow", "Lower Keys\nMesophotic", "West Palm\nShallow", "Jupiter\nShallow", "Ft. Lauderdale\nShallow", "Boynton\nShallow"), each = 12)
popj = rep(c("Upper Keys\nMesophotic", "Upper Keys\nShallow", "Lower Keys\nShallow", "Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow", "Lower Keys\nMesophotic", "West Palm\nShallow", "Jupiter\nShallow", "Ft. Lauderdale\nShallow", "Boynton\nShallow"), times = 12)
baSumm = baSumm %>% mutate(pop.i = popi, pop.j = popj) %>% relocate(c(pop.i, pop.j), .after = pops) %>% dplyr::select(-pops)
baSumm$pop.i = factor(baSumm$pop.i)
baSumm$pop.i = factor(baSumm$pop.i, levels = levels(baSumm$pop.i)[c(3, 12, 1, 2, 11, 5, 9, 7, 10, 4, 8, 6)])
baSumm$pop.j = factor(baSumm$pop.j)
baSumm$pop.j = factor(baSumm$pop.j, levels = levels(baSumm$pop.j)[c(3, 12, 1, 2, 11, 5, 9, 7, 10, 4, 8, 6)])
baSumm$site.i = word(baSumm$pop.i, 1, sep = "\n")
baSumm$site.i = factor(baSumm$site.i)
baSumm$site.i = factor(baSumm$site.i, levels = levels(baSumm$site.i)[c(3, 8, 1, 2, 7, 4, 6, 5)])
baSumm$site.j = word(baSumm$pop.j, 1, sep = "\n")
baSumm$site.j = factor(baSumm$site.j)
baSumm$site.j = factor(baSumm$site.j, levels = levels(baSumm$site.j)[c(3, 8, 1, 2, 7, 4, 6, 5)])
baSumm$depth.i = word(baSumm$pop.i, 2, sep = "\n")
baSumm$depth.i = factor(baSumm$depth.i)
baSumm$depth.i = factor(baSumm$depth.i, levels = levels(baSumm$depth.i)[c(2, 1)])
baSumm$depth.j = word(baSumm$pop.j, 2, sep = "\n")
baSumm$depth.j = factor(baSumm$depth.j)
baSumm$depth.j = factor(baSumm$depth.j, levels = levels(baSumm$depth.j)[c(2, 1)])
#All sites (excluding self retention)
baMeans = baSumm %>% filter(pop.i != pop.j) %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Global")
#mesophotic sources
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic Source") %>% bind_rows(baMeans, .)
#shallow sources
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow Source") %>% bind_rows(baMeans, .)
#mesophotic sinks
baMeans = baSumm %>% filter(pop.i != pop.j, depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic Sink") %>% bind_rows(baMeans, .)
#shallow sinks
baMeans = baSumm %>% filter(pop.i != pop.j, depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow Sink") %>% bind_rows(baMeans, .)
#mesophotic -> shallow
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Shallow") %>% bind_rows(baMeans, .)
#mesophotic -> mesophotic
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Mesophotic") %>% bind_rows(baMeans, .)
#shallow -> mesophotic
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Mesophotic") %>% summarise(mean = mean(.$mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow -> Mesophotic") %>% bind_rows(baMeans, .)
#shallow -> shallow
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Shallow") %>% summarise(mean = round(mean(.$mean), 5), sd = round(sd(.$mean), 5), se = round(sd(.$mean)/sqrt(nrow(.)), 3)) %>% mutate(dataset = paste("Shallow -> Shallow")) %>% bind_rows(baMeans, .) %>% relocate(dataset, .before = mean) %>% as.data.frame()
baMeans[,c(2:4)] = baMeans[,c(2:4)] %>% round(4)
baMeans
## dataset mean sd se
## 1 Global 0.0159 0.0173 0.0015
## 2 Mesophotic Source 0.0158 0.0215 0.0032
## 3 Shallow Source 0.0160 0.0149 0.0016
## 4 Mesophotic Sink 0.0177 0.0176 0.0027
## 5 Shallow Sink 0.0150 0.0172 0.0018
## 6 Mesophotic -> Shallow 0.0163 0.0243 0.0043
## 7 Mesophotic -> Mesophotic 0.0148 0.0115 0.0033
## 8 Shallow -> Mesophotic 0.0188 0.0195 0.0034
## 9 Shallow -> Shallow 0.0143 0.0114 0.0020
baMeansTabPub = baMeans %>%
flextable() %>%
flextable::compose(part = "header", j = "dataset", value = as_paragraph("Dataset")) %>%
flextable::compose(part = "header", j = "mean", value = as_paragraph(as_i("m"))) %>%
flextable::compose(part = "header", j = "sd", value = as_paragraph("SD")) %>%
flextable::compose(part = "header", j = "se", value = as_paragraph("SEM")) %>%
flextable::font(fontname = "Times New Roman", part = "all") %>%
flextable::fontsize(size = 10, part = "all") %>%
flextable::bold(part = "header") %>%
flextable::align(align = "left", part = "all") %>%
flextable::autofit()
table1 = read_docx()
table1 = body_add_flextable(table1, value = baMeansTabPub)
print(table1, target = "../tables/table1.docx")
baMeansTabPub
Dataset | m | SD | SEM |
|---|
Global | 0.0159 | 0.0173 | 0.0015 |
Mesophotic Source | 0.0158 | 0.0215 | 0.0032 |
Shallow Source | 0.0160 | 0.0149 | 0.0016 |
Mesophotic Sink | 0.0177 | 0.0176 | 0.0027 |
Shallow Sink | 0.0150 | 0.0172 | 0.0018 |
Mesophotic -> Shallow | 0.0163 | 0.0243 | 0.0043 |
Mesophotic -> Mesophotic | 0.0148 | 0.0115 | 0.0033 |
Shallow -> Mesophotic | 0.0188 | 0.0195 | 0.0034 |
Shallow -> Shallow | 0.0143 | 0.0114 | 0.0020 |
baSumm$mean = sprintf('%.3f', baSumm$mean)
baSumm$mean2 = baSumm$mean
baSumm$hpdLow = sprintf('%.3f', baSumm$hpdLow)
baSumm$hpdHigh = sprintf('%.3f', baSumm$hpdHigh)
baLabs = tibble(pop.i = unique(baSumm$pop.i), pop.j = unique(baSumm$pop.j))
migrateA = ggplot(data = baSumm, aes(pop.i, pop.j))+
geom_tile(data = subset(baSumm, subset = baSumm$mean2>0.65), fill = "gray35", color = "white") +
geom_segment(data = baSumm, aes(x = 0.4755, xend = -0.55, y = pop.j, yend = pop.j, color = pop.j), size = 16) +
geom_segment(data = baSumm, aes(x = pop.i, xend = pop.i, y = 0.45, yend = -0.425, color = pop.i), size = 32) +
scale_color_manual(values = flPal[c(1:8, 5:8)], guide = NULL) +
guides(fill = guide_colorbar(ticks.colour = "black", barwidth = 1, barheight = 10, frame.colour = "black")) +
geom_tile(data = subset(baSumm, subset = baSumm$mean<0.65), aes(fill = as.numeric(as.character(mean))), color = "white") +
scale_fill_gradientn(colours = paletteer_c("viridis::mako", n = 10, direction = -1)[c(1:7)], limit = c(0,0.16), space = "Lab", name = expression(paste(italic("m"))), na.value = "transparent", guide = "colourbar", values = c(0, 0.05, 0.1, 0.15, 0.2,0.5,0.75,1)) +
geom_text(data = baSumm, aes(x = pop.i, y = pop.j, label = paste(mean, "\n", sep = "")), color = ifelse(baSumm$mean > 0.6, "white", "gray5"), fontface = ifelse(as.numeric(baSumm$hpdLow)>0, "bold", "plain"), size = ifelse(as.numeric(baSumm$hpdLow)>0, 4.75, 4)) +
geom_text(data = baSumm, aes(x = pop.i, y = pop.j, label = paste("\n(",hpdLow,"–",hpdHigh, ")", sep = "")), color = ifelse(baSumm$mean > 0.6, "white", "gray5"), size = 3.25) +
geom_text(data = (baLabs %>% filter(pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#FFFFFF", family = "sans") +
geom_text(data = (baLabs %>% filter(!pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#000000", family = "sans") +
geom_text(data = (baLabs %>% filter(pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#FFFFFF", family = "sans") +
geom_text(data = (baLabs %>% filter(!pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#000000", family = "sans") +
labs(x = "Sink", y = "Source") +
scale_y_discrete(limits = rev(levels(baSumm$pop.i))[c(1:12)], position = "left") +
coord_cartesian(xlim = c(1, 12), ylim = c(1, 12), clip = "off") +
theme_minimal()
migrate = migrateA + theme(
axis.text.x = element_text(vjust = 1, size = 12, hjust = 0.5, color = NA),
axis.text.y = element_text(size = 10, color = NA),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
panel.grid.major = element_blank(),
axis.ticks = element_blank(),
# legend.position = c(1.055, 0.5),
legend.direction = "vertical",
legend.title = element_text(size = 12, face = "bold")
)
# migrate
ggsave("../figures/figure7.png", plot = migrate, width = 36, height = 18, units = "cm", dpi = 300)

Main cluster
maMain = as.matrix(read.table("../data/xestoMain.ibsMat"))
bamsMain = read.delim("../data/pcangsd/mainCluster", header = FALSE) %>% rename("sample" = "V1") %>% left_join(bams) %>% dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM)
## Joining with `by = join_by(sample)`
dimnames(maMain) = list(bamsMain[,1],bamsMain[,1])
pcAdmixMain = read.table("../data/admix/main/K3main.output") %>% dplyr::select(V6, V7, V8) %>% dplyr::rename("Xm1" = "V6", "Xm1.2" = "V7", "Xm1.3" = "V8") %>% cbind(bamsMain)
pcAdmixMainMelt = melt(pcAdmixMain, id.vars = c("sample", "site", "depth", "depthm"))
trMain = hclust(dist(maMain),"ave")
p1Main = ggtree(trMain, layout = "rectangular") #+ ggtree::geom_tiplab()
p2Main = facet_plot(p1Main, panel = "location", data=bamsMain, geom = geom_tile, mapping = aes(x = 1, fill = site), size = 0.1, color = "grey25") +
scale_fill_manual("Site", values = flPal, guide = guide_legend(ncol = 1, order = 1)) +
theme(legend.key.size = unit(.8, "line")) +
new_scale_fill()
p3Main = facet_plot(p2Main, panel = "depth zone", data=bamsMain, geom = geom_tile, mapping = aes(x = 1, fill = depth), size = 0.1, color = "grey25") +
scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(ncol = 1, order = 2)) +
new_scale_fill()
p4Main = facet_plot(p3Main, panel = "depth", data=bamsMain, geom = geom_tile, mapping = aes(x = 1, fill = depthm), size = 0.1, color = "grey25") +
scale_fill_viridis_c("Depth (m)", option = "mako", direction = -1, guide = guide_colorbar(order = 2, direction = "horizontal")) +
theme(legend.title.position = "top") +
new_scale_fill()
p5Main = facet_plot(p4Main, panel="K = 2", data = pcAdmixMainMelt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable),
stat ='identity', size = 0.15, color = "grey25") +
scale_fill_manual("Lineage",values = xestoKColPal[c(1,3,4)], guide = guide_legend(ncol = 1, order = 4)) +
theme(strip.text = element_blank(),
#legend.direction = "horizontal",
legend.text = element_text(size = 10),
legend.title = element_text(size = 10),
legend.key = element_blank(),
legend.margin = margin(c(0,0,0,0),unit = "pt"))
p5Main

mainTree = facet_widths(p5Main, widths = c(0.25, 0.025, 0.025, 0.025, 0.25))
covMain = read.table("../data/pcangsd/flXestoMainPcangsd.cov") %>% as.matrix()
pcAdmixMain2 = read.table("../data/admix/main/K3main.output") %>% dplyr::select(V6, V7, V8)
pcAdmixMain2 = pcAdmixMain2 %>% dplyr::rename("Xm1" = "V6", "Xm1.2" = "V7", "Xm1.3" = "V8")
pcaEigMain = eigen(covMain)
xestoMainPcaVar = pcaEigMain$values/sum(pcaEigMain$values)*100
head(xestoMainPcaVar)
## [1] 3.098399 2.275836 1.576540 1.541056 1.439801 1.368856
pcangsdMain = read.delim("../data/pcangsd/mainCluster", header = FALSE) %>% rename("sample" = "V1") %>% left_join(bams) %>% dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% cbind(pcAdmixMain2)
## Joining with `by = join_by(sample)`
pcangsdMain$sitedepth = as.factor(paste(pcangsdMain$site, pcangsdMain$depth, sep = " "))
pcangsdMain$depth = factor(pcangsdMain$depth)
pcangsdMain$depth = factor(pcangsdMain$depth, levels = levels(pcangsdMain$depth)[c(2,1)])
pcangsdMain$sitedepth = factor(pcangsdMain$sitedepth)
pcangsdMain$sitedepth = factor(pcangsdMain$sitedepth, levels(pcangsdMain$sitedepth)[c(3, 12, 1, 2, 11, 10, 5, 4, 9, 8, 7, 6)])
pcangsdMain$depth = factor(pcangsdMain$depth)
pcangsdMain$depth = factor(pcangsdMain$depth, levels(pcangsdMain$depth)[c(2, 1)])
pcangsdMain$PC1 = pcaEigMain$vectors[,1]
pcangsdMain$PC2 = pcaEigMain$vectors[,2]
pcangsdMain$PC3 = pcaEigMain$vectors[,3]
pcangsdMain$PC4 = pcaEigMain$vectors[,4]
pcangsdClustMain = pcAdmixMain2 %>% mutate(cluster = ifelse(Xm1 >=0.75, "Xm1", ifelse(Xm1.2 >= 0.75, "Xm1.2", ifelse(Xm1.3 >= 0.75, "Xm1.3", "Admixed"))))
pcangsdMain = pcangsdMain %>% mutate(cluster = pcangsdClustMain$cluster)
pcangsdMain = merge(pcangsdMain, aggregate(cbind(mean.x = PC1, mean.y = PC2)~sitedepth, pcangsdMain, mean), by="sitedepth")
pcangsdMain$cluster = factor(pcangsdMain$cluster)
pcangsdMain$cluster = factor(pcangsdMain$cluster, levels = levels(pcangsdMain$cluster)[c(2,3,4,1)])
pcaPlotMainSA = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
geom_point(data = pcangsdMain, aes(x = PC1, y = PC2, fill = site, shape = depth), color = "black", stroke = 0.25, size = 4, alpha = 0.5, show.legend = FALSE) +
geom_point(data = pcangsdMain, aes(x = mean.x, y = mean.y, fill = site, shape = depth), color = "black", size = 5, alpha = 1, stroke = 0.5) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = flPal, name = "Site") +
scale_color_manual(values = flPal, name = "Site") +
labs(x = paste0("PC 1 (", format(round(xestoMainPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoMainPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = guide_legend(override.aes = list(size = 4, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 4, fill = flPal, alpha = NA), order = 1, ncol = 1)) +
theme_bw()
pcaPlotMainS = pcaPlotMainSA +
pcaTheme +
theme(legend.position = c(0.15, 0.24))
pcaPlotMainS

pcaPlotMainK3A = ggplot() +
geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
geom_point(data = pcangsdMain, aes(x = PC1, y = PC2, fill = cluster, shape = depth), color = "black", size = 4, alpha = 1, show.legend = TRUE) +
scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
scale_fill_manual(values = xestoKColPal[c(1,3,4,6)], name = "Lineage") +
labs(x = paste0("PC 1 (", format(round(xestoMainPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoMainPcaVar[2], 1), nsmall = 1), " %)")) +
guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 4, fill = xestoKColPal[c(1,3,4,6)], alpha = NA), order = 1, ncol = 1))+
theme_bw()
pcaPlotMainK3 = pcaPlotMainK3A +
pcaTheme +
theme(legend.position = c(0.12, 0.15))
pcaPlotMainK3

pcAdmixMain3 = arrange(pcAdmixMain, site, depth, -Xm1, -Xm1.2)
popCounts = pcAdmixMain3 %>% group_by(site, depth) %>% summarise(n = n())
## `summarise()` has grouped output by 'site'. You can override using the `.groups`
## argument.
pcAdmixMain3$site = factor(pcAdmixMain3$site, levels = rev(levels(pcAdmixMain3$site)))
popCounts
## # A tibble: 12 × 3
## # Groups: site [8]
## site depth n
## <fct> <fct> <int>
## 1 Jupiter Shallow 27
## 2 West Palm Shallow 29
## 3 Boynton Shallow 28
## 4 Ft. Lauderdale Shallow 9
## 5 Upper Keys Shallow 27
## 6 Upper Keys Mesophotic 28
## 7 Lower Keys Shallow 28
## 8 Lower Keys Mesophotic 30
## 9 Tortugas Bank Shallow 29
## 10 Tortugas Bank Mesophotic 15
## 11 Riley's Hump Shallow 26
## 12 Riley's Hump Mesophotic 24
popCountList = reshape2::melt(lapply(popCounts$n,function(x){c(1:x)}))
pcAdmixMain3$ord = popCountList$value
pcAdmixMain3Melt = melt(pcAdmixMain3, id.vars=c("sample", "site", "depth", "depthm", "ord"), variable.name="Ancestry", value.name="Fraction")
pcAdmixMain3Melt$Ancestry = factor(pcAdmixMain3Melt$Ancestry)
pcAdmixMain3Melt$Ancestry = factor(pcAdmixMain3Melt$Ancestry, levels = rev(levels(pcAdmixMain3Melt$Ancestry)))
popAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), x2 = c(30.5, 30.5, 30.5, 30.5, 30.5, 30.5, 30.5, 30.5),
y1 = -.1, y2 = -.1, sample = NA, Ancestry = NA, depth = "Shallow",
ord = NA, Fraction = NA,
site = c("Riley's Hump", "Tortugas Bank",
"Lower Keys", "Upper Keys", "Ft. Lauderdale", "Boynton", "West Palm", "Jupiter"))
popAnno$site = factor(popAnno$site)
popAnno$site = factor(popAnno$site, levels = levels(popAnno$site)[rev(c(3, 8, 1, 2, 7, 4, 6, 5))])
Make admixture plots
mainAdmixPlotA = ggplot(data = pcAdmixMain3Melt, aes(x = ord, y = Fraction, fill = Ancestry, order = sample)) +
geom_segment(data = popAnno, aes(x = x1, xend = x2, y = -1.11, yend = -1.11, color = site), size = 7) +
geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", size = 0.2) +
facet_grid(factor(depth) ~ site, switch = "both") +
geom_text(data = (pcAdmixMain3Melt %>% filter(depth == "Mesophotic", site %in% c("Riley's Hump", "Tortugas Bank"), sample %in% c("FKX047", "FKX070"), Ancestry == "Xm1")), x = 15.5, y = -.1, aes(label = site), size = 4, color = "#FFFFFF") +
geom_text(data = (pcAdmixMain3Melt %>% filter(depth == "Shallow", !site %in% c("Riley's Hump", "Tortugas Bank"), sample %in% c("FKX148", "FKX208", "SFX069", "SFX093", "SFX021","SFX035"), Ancestry == "Xm1")), x = 15.5, y = -1.11, aes(label = site), size = 4, color = "#000000") +
scale_fill_manual(values = xestoKColPal[c(1,3,4)]) +
scale_color_manual(values = rev(flPal)) +
scale_x_discrete(expand = c(0.005, 0.005)) +
scale_y_continuous(expand = c(0.001, 0.001)) +
coord_cartesian(ylim = c(0.0, 1.0), clip = "off") +
theme_bw()
mainAdmixPlot = mainAdmixPlotA +
theme_bw()+
theme(
panel.grid = element_blank(),
plot.background = element_blank(),
panel.border = element_rect(fill = NA, color = "black", size = 0.75, linetype = "solid"),
panel.spacing.x = grid:::unit(0.05, "lines"),
panel.spacing.y = grid:::unit(0.05, "lines"),
panel.background = element_blank(),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
strip.background.x = element_blank(),
strip.background.y = element_blank(),
strip.text = element_text(size = 8),
strip.text.y.left = element_text(size = 10, angle = 90),
strip.text.x.bottom = element_text(vjust = 1, color = NA),
legend.key = element_blank(),
legend.position = "none",
legend.title = element_text(size = 8))
mainAdmixPlot

mainPlots = (mainTree)/((pcaPlotMainS + pcaPlotMainK3 + guide_area()) +
plot_layout(widths = c(1, 1,0.25), guides = "collect"))/(mainAdmixPlot) +
plot_layout(heights = c(2.5, 1.75, 2)) +
plot_annotation(tag_levels = 'A') &
theme(plot.tag = element_text(size = 20),
legend.text = element_text(size = 10),
legend.title = element_text(size = 10),
legend.background = element_blank(),
legend.position = )
mainPlots

ggsave("../figures/figureS1.png", plot = mainPlots, height = 14, width = 12, units = "in", dpi = 300)
save.image("flXesto.RData")